sink("survey.benchmark-psrm-log.txt",append=F,type="output")

##################
####2016 ANES
##################

library(plyr)
library(SDMTools)

rescale <- function(data){
	minimum.vec <- min(data,na.rm=TRUE)
	maximum.vec <- max(data,na.rm=TRUE)
	out <- (data-minimum.vec)/(maximum.vec-minimum.vec)
	return(out)
}

#Load ANES
load('anes-psrm.RData')

anes.online <- subset(anes,anes$V160501=='2. Web')
anes.online$weight_pulse <- anes.online$V160102w 
anes.online <- anes.online[,c('income','age','female','black','hispanic','white','other.race','college.plus','some.college','high.school.less','northeast','south','midwest','west','party.7pt','dem.partisanship.3pt','rep.partisanship.3pt','ind.partisanship.3pt','strong.partisan','liberal','conservative','moderate','primary.turnout','general.turnout','syria.refugees','strong.syria','abortion','strong.abortion','tea.party.therm','trump.therm','obama.therm','clinton.therm','clinton.post','weight_pulse')]

anes <- subset(anes,anes$V160501=='1. FTF/CASI')
anes$weight_pulse <- anes$V160102f
anes <- anes[,c('income','age','female','black','hispanic','white','other.race','college.plus','some.college','high.school.less','northeast','south','midwest','west','party.7pt','dem.partisanship.3pt','rep.partisanship.3pt','ind.partisanship.3pt','strong.partisan','liberal','conservative','moderate','primary.turnout','general.turnout','syria.refugees','strong.syria','abortion','strong.abortion','tea.party.therm','trump.therm','obama.therm','clinton.therm','clinton.post','weight_pulse')]

##Pre-Analysis Survey
load('pre.survey-psrm.RData')
pre.survey <- subset(pre.survey,!is.na(pre.survey$weight_pulse))
pre.survey <- pre.survey[,names(anes)]


###
#COMBINED FOR SCALING PURPOSES
###
pre.survey$survey <- 'panel'
anes$survey <- 'anes'
anes.online$survey <- 'anes.online'
combined <- rbind(pre.survey,anes,anes.online)
combined$party.7pt <- scale(x=combined$party.7pt)
combined$income <- scale(x=combined$income)
combined$age <- scale(x=combined$age)
combined$syria.refugees <- scale(x=combined$syria.refugees)
combined$abortion <- scale(x=combined$abortion)
combined$tea.party.therm <- scale(x=combined$tea.party.therm)
combined$trump.therm <- scale(x=combined$trump.therm)
combined$obama.therm <- scale(x=combined$obama.therm)
combined$clinton.therm <- scale(x=combined$clinton.therm)

pre.survey <- subset(combined,combined$survey=='panel')
anes <- subset(combined,combined$survey=='anes')
anes.online <- subset(combined,combined$survey=='anes.online')

var.comparison <- names(anes)

weighted.proportion.panel <- NA
weighted.proportion.anes <- NA
weighted.proportion.anes.online <- NA
weighted.variance.panel <- NA
weighted.variance.anes <- NA
weighted.variance.anes.online <- NA
panel.sample <- NA
anes.sample <- NA
anes.online.sample <- NA

for(k in 1:(length(var.comparison)-2)){
	weighted.proportion.panel[k] <- weighted.mean(x=pre.survey[,var.comparison[k]],w=pre.survey$weight_pulse,na.rm=TRUE)
	weighted.proportion.anes[k] <- weighted.mean(x=anes[,var.comparison[k]],w=anes$weight_pulse,na.rm=TRUE)
	weighted.proportion.anes.online[k] <- weighted.mean(x=anes.online[,var.comparison[k]],w=anes.online$weight_pulse,na.rm=TRUE)

	weighted.variance.panel[k] <- wt.var(x=pre.survey[which(!is.na(pre.survey[,var.comparison[k]])),var.comparison[k]],wt=pre.survey$weight_pulse[which(!is.na(pre.survey[,var.comparison[k]]))])
	weighted.variance.anes[k] <- wt.var(x=anes[which(!is.na(anes[,var.comparison[k]])),var.comparison[k]],wt=anes$weight_pulse[which(!is.na(anes[,var.comparison[k]]))])
	weighted.variance.anes.online[k] <- wt.var(x=anes.online[which(!is.na(anes.online[,var.comparison[k]])),var.comparison[k]],wt=anes.online$weight_pulse[which(!is.na(anes.online[,var.comparison[k]]))])
	
	panel.sample[k] <- length(which(!is.na(pre.survey[,var.comparison[k]])))
	anes.sample[k] <- length(which(!is.na(anes[,var.comparison[k]])))
	anes.online.sample[k] <- length(which(!is.na(anes.online[,var.comparison[k]])))
}

panel.proportion <- cbind.data.frame(weighted.proportion.panel,weighted.variance.panel,panel.sample)
panel.proportion$names <- var.comparison[1:(length(var.comparison)-2)]
panel.proportion <- panel.proportion[,c('names','weighted.proportion.panel','weighted.variance.panel','panel.sample')]
names(panel.proportion) <- c('names','mean','variance','sample.size')

anes.proportion <- cbind.data.frame(weighted.proportion.anes,weighted.variance.anes,anes.sample)
anes.proportion$names <- var.comparison[1:(length(var.comparison)-2)]
anes.proportion <- anes.proportion [,c('names','weighted.proportion.anes','weighted.variance.anes','anes.sample')]
names(anes.proportion) <- c('names','mean','variance','sample.size')

anes.online.proportion <- cbind.data.frame(weighted.proportion.anes.online,weighted.variance.anes.online,anes.online.sample)
anes.online.proportion$names <- var.comparison[1:(length(var.comparison)-2)]
anes.online.proportion <- anes.online.proportion[,c('names','weighted.proportion.anes.online','weighted.variance.anes.online','anes.online.sample')]
names(anes.online.proportion) <- c('names','mean','variance','sample.size')

###
###ANES to Toolbar Sample
###

category <- NA
difference.value <- NA
difference.standard.error <- NA
ttest <- NA	
p.value <- NA
panel.value <- NA
panel.standard.error <- NA
anes.value <- NA
	
for(k in 1:dim(panel.proportion)[1]){
	#k <- 1
	category[k] <- as.character(panel.proportion[k,1])
	difference.value[k] <- panel.proportion[k,2] - anes.proportion[k,2]
	panel.value[k] <- panel.proportion[k,2]
	panel.standard.error[k] <- sqrt( (anes.proportion[k,3]/anes.proportion[k,4]) + (panel.proportion[k,3]/panel.proportion[k,4]))
	anes.value[k] <- anes.proportion[k,2]
	df.current <- min(panel.proportion[k,4],anes.proportion[k,4])
	ttest[k] <- (panel.proportion[k,2] - anes.proportion[k,2])/(sqrt ((panel.proportion[k,3]/panel.proportion[k,4]) + (anes.proportion[k,3]/anes.proportion[k,4])))
	p.value[k] <- 2*(1 - (pt(q=abs(ttest[k]),df=df.current,lower.tail=TRUE)))
}
	
out.frame <- cbind.data.frame(category,round(panel.value,2),round(anes.value,2),round(difference.value,2),round(panel.standard.error,2),round(ttest,digits=2),round(p.value,digits=2))
names(out.frame) <- c('category','panel','anes','difference','difference.se','t.stat','p.value')

var.labels <- c("Income","Age","Female","Black","Hispanic","White","Other Race","College/More","Some College","High School/Less","Northeast","South","Midwest","West","PID Scale (7-pt)","Democrat","Republican","Independent","Strong Partisan","Liberal","Conservative","Moderate","Primary Turnout","General Turnout","Refugees","Strong Stance","Abortion","Strong Stance","Tea Party","Trump","Obama","Clinton","Clinton Vote")

###
###ANES Online to Toolbar Sample
###

category.online <- NA
difference.value.online <- NA
difference.standard.error.online <- NA
ttest.online <- NA	
p.value.online <- NA
panel.value.online <- NA
panel.standard.error.online <- NA
anes.value.online <- NA
#anes.standard.error <- NA
	
for(k in 1:dim(panel.proportion)[1]){
	#k <- 1
	category[k] <- as.character(panel.proportion[k,1])
	difference.value.online[k] <- panel.proportion[k,2] - anes.online.proportion[k,2]
	panel.value[k] <- panel.proportion[k,2]
	panel.standard.error.online[k] <- sqrt( (anes.online.proportion[k,3]/anes.online.proportion[k,4]) + (panel.proportion[k,3]/panel.proportion[k,4]))
	anes.value.online[k] <- anes.online.proportion[k,2]
	df.current <- min(panel.proportion[k,4],anes.online.proportion[k,4])
	ttest.online[k] <- (panel.proportion[k,2] - anes.online.proportion[k,2])/(sqrt ((panel.proportion[k,3]/panel.proportion[k,4]) + (anes.online.proportion[k,3]/anes.online.proportion[k,4])))
	p.value.online[k] <- 2*(1 - (pt(q=abs(ttest.online[k]),df=df.current,lower.tail=TRUE)))
}
	
out.frame.online <- cbind.data.frame(category,round(panel.value,2),round(anes.value.online,2),round(difference.value.online,2),round(panel.standard.error.online,2),round(ttest.online,digits=2),round(p.value.online,digits=2))
names(out.frame.online) <- c('category','panel','anes','difference','difference.se','t.stat','p.value')

###
###Plot ANES to Toolbar Comparison
###
pdf(file='figure-a4.pdf',height=10,width=10.75)
par(mar=c(5,8,4,5.5))
plot(y=length(out.frame$difference):1,x=out.frame$difference,xlim=c(-.4,.4),pch=16,yaxt='n',xlab="Difference (Toolbar Sample - NES Sample)
Standard Deviations for Non-Binary Variables",ylab='',main="Differences in Sample Composition Relative to NES Sample",cex=1.5,cex.main=2,cex.axis=1.2,cex.lab=1.2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=out.frame$difference + 2*out.frame$difference.se,x1=out.frame$difference - 2*out.frame$difference.se,lwd=4)
axis(side=2,at=length(out.frame$difference):1,labels=var.labels,las=1)
abline(v=0,lty=2,lwd=2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=rep(-1.5,length(out.frame$difference)),x1=rep(1.5,length(out.frame$difference)),lty=3)
#segments(y0=c(32.5,28.5,25.5,22.5,15.5,13.5,10.5,8.5,5.5,1.5),y1=c(32.5,28.5,25.5,22.5,15.5,13.5,10.5,8.5,5.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
segments(y0=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),y1=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
axis(side=4,at=c(1,3.5,7.5,10.5,13,17,21.5,25,28.5,32),labels=c('Vote','Therm','Issue','Turnout','Ideology','Partisan','Region','Education','Race','Demos'),las=1,cex.axis=1.2)
dev.off()

###
###Plot ANES Online to Toolbar Comparison
###
pdf(file='figure-a5.pdf',height=10,width=10.75)
par(mar=c(5,8,4,5.5))
plot(y=length(out.frame.online$difference):1,x=out.frame.online$difference,xlim=c(-.4,.4),pch=16,yaxt='n',xlab="Difference (Toolbar Sample - Online NES Sample)
Standard Deviations for Non-Binary Variables",ylab='',main="Differences in Sample Composition Relative to Online NES",cex=1.5,cex.main=2,cex.axis=1.2,cex.lab=1.2)
segments(y0=length(out.frame.online$difference):1,y1=length(out.frame.online$difference):1,x0=out.frame.online$difference + 2*out.frame.online$difference.se,x1=out.frame.online$difference - 2*out.frame.online$difference.se,lwd=4)
axis(side=2,at=length(out.frame.online$difference):1,labels=var.labels,las=1)
abline(v=0,lty=2,lwd=2)
segments(y0=length(out.frame.online$difference):1,y1=length(out.frame.online$difference):1,x0=rep(-1.5,length(out.frame.online$difference)),x1=rep(1.5,length(out.frame.online$difference)),lty=3)
segments(y0=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),y1=c(30.5,26.5,23.5,19.5,14.5,11.5,9.5,5.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
axis(side=4,at=c(1,3.5,7.5,10.5,13,17,21.5,25,28.5,32),labels=c('Vote','Therm','Issue','Turnout','Ideology','Partisan','Region','Education','Race','Demos'),las=1,cex.axis=1.2)
dev.off()

############################################################
############################################################
############################################################
############################################################

load('cces2016-psrm.RData')
cces2016$id <- NULL
load('pre.survey-psrm.RData')
pre.survey <- subset(pre.survey,!is.na(pre.survey$weight_pulse))
pre.survey <- pre.survey[,names(cces2016)]

###
pre.survey$survey <- 'panel'
cces2016$survey <- 'cces'
combined <- rbind(pre.survey,cces2016)
combined$party.7pt <- scale(x=combined$party.7pt,scale=TRUE)
combined$income <- scale(x=combined$income)
combined$age <- scale(x=combined$age)
combined$political.interest <- scale(x=combined$political.interest)

pre.survey <- subset(combined,combined$survey=='panel')
cces2016 <- subset(combined,combined$survey=='cces')

###
###
###
var.comparison <- names(cces2016)

weighted.proportion.panel <- NA
weighted.proportion.cces <- NA
weighted.variance.panel <- NA
weighted.variance.cces <- NA
panel.sample <- NA
cces.sample <- NA

for(k in 1:(length(var.comparison)-2)){
	weighted.proportion.panel[k] <- weighted.mean(x=pre.survey[,var.comparison[k]],w=pre.survey$weight_pulse,na.rm=TRUE)
	weighted.proportion.cces[k] <- weighted.mean(x=cces2016[,var.comparison[k]],w=cces2016$weight_pulse,na.rm=TRUE)

	weighted.variance.panel[k] <- wt.var(x=pre.survey[which(!is.na(pre.survey[,var.comparison[k]])),var.comparison[k]],wt=pre.survey$weight_pulse[which(!is.na(pre.survey[,var.comparison[k]]))])
	weighted.variance.cces[k] <- wt.var(x=cces2016[which(!is.na(cces2016[,var.comparison[k]])),var.comparison[k]],wt=cces2016$weight_pulse[which(!is.na(cces2016[,var.comparison[k]]))])
	
	panel.sample[k] <- length(which(!is.na(pre.survey[,var.comparison[k]])))
	cces.sample[k] <- length(which(!is.na(cces2016[,var.comparison[k]])))
}

panel.proportion <- cbind.data.frame(weighted.proportion.panel,weighted.variance.panel,panel.sample)
panel.proportion$names <- var.comparison[1:(length(var.comparison)-2)]
panel.proportion <- panel.proportion[,c('names','weighted.proportion.panel','weighted.variance.panel','panel.sample')]
names(panel.proportion) <- c('names','mean','variance','sample.size')

cces.proportion <- cbind.data.frame(weighted.proportion.cces,weighted.variance.cces,cces.sample)
cces.proportion$names <- var.comparison[1:(length(var.comparison)-2)]
cces.proportion <- cces.proportion [,c('names','weighted.proportion.cces','weighted.variance.cces','cces.sample')]
names(cces.proportion) <- c('names','mean','variance','sample.size')

###
###CCES to Toolbar Sample
###

category <- NA
difference.value <- NA
difference.standard.error <- NA
ttest <- NA	
p.value <- NA
panel.value <- NA
panel.standard.error <- NA
cces.value <- NA
#anes.standard.error <- NA
	
for(k in 1:dim(panel.proportion)[1]){
	category[k] <- as.character(panel.proportion[k,1])
	difference.value[k] <- panel.proportion[k,2] - cces.proportion[k,2]
	panel.value[k] <- panel.proportion[k,2]
	panel.standard.error[k] <- sqrt( (cces.proportion[k,3]/cces.proportion[k,4]) + (panel.proportion[k,3]/panel.proportion[k,4]))
	cces.value[k] <- cces.proportion[k,2]
	df.current <- min(panel.proportion[k,4],cces.proportion[k,4])
	ttest[k] <- (panel.proportion[k,2] - cces.proportion[k,2])/(sqrt ((panel.proportion[k,3]/panel.proportion[k,4]) + (cces.proportion[k,3]/cces.proportion[k,4])))
	p.value[k] <- 2*(1 - (pt(q=abs(ttest[k]),df=df.current,lower.tail=TRUE)))
}
	
out.frame <- cbind.data.frame(category,round(panel.value,2),round(cces.value,2),round(difference.value,2),round(panel.standard.error,2),round(ttest,digits=2),round(p.value,digits=2))
names(out.frame) <- c('category','panel','cces','difference','difference.se','t.stat','p.value')

var.labels <- c("Income","Age","Female","Black","Hispanic","White","Other Race","College/More","Some College","High School/Less","Northeast","South","Midwest","West","PID Scale (7-pt)","Democrat","Republican","Independent","Strong Partisan","Liberal","Conservative","Moderate","Primary Turnout","General Turnout","Political Interest","Clinton Vote")

pdf(file='figure-a6.pdf',height=10,width=10.75)
par(mar=c(5,8,4,5.5))
plot(y=length(out.frame$difference):1,x=out.frame$difference,xlim=c(-.4,.4),pch=16,yaxt='n',xlab="Difference (Toolbar Sample - CCES Sample)
Standard Deviations for Non-Binary Variables",ylab='',main="Differences in Sample Composition Relative to CCES Sample",cex=1.5,cex.main=2,cex.axis=1.2,cex.lab=1.2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=out.frame$difference + 2*out.frame$difference.se,x1=out.frame$difference - 2*out.frame$difference.se,lwd=4)
axis(side=2,at=length(out.frame$difference):1,labels=var.labels,las=1)
abline(v=0,lty=2,lwd=2)
segments(y0=length(out.frame$difference):1,y1=length(out.frame$difference):1,x0=rep(-1.5,length(out.frame$difference)),x1=rep(1.5,length(out.frame$difference)),lty=3)
segments(y0=c(23.5,19.5,16.5,12.5,7.5,4.5,1.5),y1=c(23.5,19.5,16.5,12.5,7.5,4.5,1.5),x0=c(-1.5,-1.5,-1.5,-1.5-1.5,-1.5),x1=c(1.5,1.5,1.5,1.5,1.5,1.5),lwd=2.5)
axis(side=4,at=c(1,3,6,10,14.5,18,21.5,25),labels=c('Vote','Interest','Ideology','Partisan','Region','Education','Race','Demos'),las=1,cex.axis=1.2)
dev.off()

sink()